import requests
from bs4 import  BeautifulSoup
import tool

#抓取知乎网站数据

#抓取知乎网站热门话题需要登录，稍后再尝试


def get_url():

    url = 'https://www.zhihu.com/hot'

    return url


def parse_html(db_conn,db_cur,url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
        'Connection': 'close'
        'cookies':'_zap=7c81e1ab-7638-4d89-a8df-7f4f73d5d318; _xsrf=kBJPeG3JoKt8RyDfotQ5tumQD56YX6TX; d_c0="ABDil0kyuA6PTjVQ7IwneotUmWxXWnJH-rU=|1545659382"; capsion_ticket="2|1:0|10:1545659451|14:capsion_ticket|44:ZGZmZTBhNDBmYzJiNDY3ODgxYmUxMjViOWVmOTBiOTM=|08667171cafa1b1e4dc87a108af9cb4fafcdb1a5142070981ee56def83292c45"; z_c0="2|1:0|10:1545659477|4:z_c0|92:Mi4xZW1vbkRRQUFBQUFBSUNLQ1NUSzREaVlBQUFCZ0FsVk5WVElPWFFBSm1EN1JRUHZlUzFiYTdwcWJIY3haOXZxQjFR|ec049be6c582d5a0d25bf8c6df84b9182a25f15df13b47bec1bf2b0850e9fca1"; q_c1=d9dbf3826c314a8cb47c0ea608032745|1545659479000|1545659479000; __utmv=51854390.100--|2=registration_date=20181113=1^3=entry_date=20181113=1; tst=h; __utma=51854390.459793879.1545659575.1545659575.1547120931.2; __utmb=51854390.0.10.1547120931; __utmc=51854390; __utmz=51854390.1547120931.2.2.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/hot; tgw_l7_route=060f637cd101836814f6c53316f73463'
    }
    # 获取知乎网站热榜数据
    response = requests.get(url=url, headers=headers).text
    # if response.status_code == 200:
    #     soup = BeautifulSoup(response.text,'lxml')
    # else:
    #     print('数据获取失败')
    #
    # hots = soup.find_all(attrs={'class','HotItem-content'})

    print(response)


if __name__ == '__main__':
    db_conn = tool.get_connect()
    db_cur = tool.get_cursor(db_conn)
    url = get_url()
    parse_html(db_conn, db_cur, url)
